package com.b2s.scrumblr.kb.core.util;

import org.apache.commons.lang3.StringUtils;
import org.apache.lucene.analysis.CharReader;
import org.apache.lucene.analysis.charfilter.HTMLStripCharFilter;

import java.io.ByteArrayInputStream;
import java.io.IOException;
import java.io.InputStreamReader;

/**
 * Created by Administrator on 2016/8/12.
 */
public class HtmlUtil {

    public static String stripTag(String html) throws IOException {
        if(StringUtils.isBlank(html)){
            return "";
        }
        CharReader charReader = (CharReader) CharReader.get(new InputStreamReader(new ByteArrayInputStream(html.getBytes())));
        HTMLStripCharFilter filter = new HTMLStripCharFilter(charReader);
        StringBuilder sb = new StringBuilder();
        int ch = 0;
        while((ch = filter.read()) != -1){
            sb.append((char)ch);
        }
        return sb.toString();
    }
}
